********************************************************************************
/*
REPLICATION CODE
MANUSCRIPT TITLE: Dissecting Diffusion: Tracing the Plurality of Factors that Shape Knowledge Diffusion
JOURNAL: Research Policy
AUTHORS: Paige Clayton, Lauren Lanahan, Andrew Nelson
AUTHOR LEAD FOR GATHERING RAW DATA: Andrew Nelson
AUTHOR LEAD FOR WRITING STATA CODE: Lauren Lanahan

COMMENT: Code for primary Tables and Figures reported below. Given the various units of analysis (refer to Table 2), we include a series of data files in various structures (i.e., individual-level, organization-level, output-level, etc.). The do file reflects the order of material as presented in the manuscript. Data and code for material presented in Appendix are available upon request. Reach out to Lauren Lanahan for more information. 
*/
********************************************************************************
global directory "XXX_PATHWAY_/FILE.dta" // User will need to set the directory manually.

		* TABLE 1: Features of the Two Cases
{
*Authors report qualitative detail. The material is derived from in-depth case-study review of recombinant DNA (rDNA) and polymerase chain reaction (PCR). 
}
		* TABLE 2: Metrics of Diffusion
{
*Authors report functional form for variables. 
}

		* FIGURE 1: Total Unique Organizations
{
clear all
set more off
use "$directory/Total Unique Organizations.dta"
	sort OrgCode core year
	quietly bys OrgCode core: gen dup1 = cond(_N==1,0,_n)
	keep if dup1 == 0 | dup1 == 1	
	gen neworg = 1
	drop if panel > 15
	bys panel : egen PCR_NewOrg = sum(neworg) if core == 1 
	bys panel : egen rDNA_NewOrg = sum(neworg) if core == 0 
	twoway  (line PCR_NewOrg panel, lwidth(thick) color(blue) mcolor(black) msymbol(S)) (line rDNA_NewOrg panel, mcolor(gs10) lwidth(medthick) color(red) msymbol(S)), yscale(range(0 300)) ylabel(0(50)300) ytitle("Unique Organizations") xscale(range(1 15)) xlabel(1(1)15) xtitle("Panel") graphregion(fcolor(white)) legend(label(1 "PCR") label(2 "rDNA")) legend(subtitle("Publication & Patent Application Activity"))
}

		* TABLE 3: Descriptive Statistics

*** Table 3 Panel A Raw Statistics
{	
	* Total Unique Organizations
	clear all
	set more off
	use "$directory\Total Unique Organizations.dta"
	sort OrgCode core year
	quietly bys OrgCode core: gen dup1 = cond(_N==1,0,_n)
	keep if dup1 == 0 | dup1 == 1	
	gen neworg = 1
	tab core if panel <= 10
}
*** Table 3 Panel B Raw Statistics
{	
	* Output
	clear all
	set more off
	use "$directory\Publications.dta"
	tab core if panel <=10

	* Scientists (Authors)
	clear all
	set more off
	use "$directory\Publication Authors.dta"
	sort NamesForSub core year
	quietly bys NamesForSub core: gen dup1 = cond(_N==1,0,_n)
	keep if dup1 == 0 | dup1 == 1	
	tab core if panel <= 10
	
	* Organizations 
	clear all
	set more off
	use "$directory\Publication Organizations.dta"
	sort OrgCode core year
	quietly bys OrgCode core: gen dup1 = cond(_N==1,0,_n)
	keep if dup1 == 0 | dup1 == 1	
	gen neworg = 1
	tab core if panel <=10
}
*** Table 3 Panel B Comparison of Means
{
	clear all
	set more off
	use "$directory\Publications.dta"
	sort PubID
	global x core
	set more off
	global variables only_university only_firm joint_orgtype Journal_1pct 
	matrix m1=(1)
	matrix m2=(1)
	matrix m3=(1)
	matrix m4=(1)
	foreach x in $variables {
	set more off
	di "T-test of means for `x'"
	ttest `x' if panel <= 10, by($x) unequal
	matrix m1 = m1\(r(mu_1))
	matrix m2 = m2\(r(mu_2))
	matrix m3 = m3\(r(t))
	matrix m4 = m4\(r(p))
	}
	matrix q1 = (m1[2..5,1..1],m2[2..5,1..1],m3[2..5,1..1],m4[2..5,1..1])
	matrix colnames q1 = rDNA PCR t-stat p-value
	matrix rownames q1 = OnlyUni OnlyFirm Joint Top1pct  
	matrix drop m1 m2 m3 m4
	mat list q1
}	
*** Table 3 Panel C Raw Statistics 
{
	* Output Count
	clear all
	set more off
	use "$directory\Patents.dta"
	tab core if fpanel <=10 	
	
	* Scientists (Inventors)
	clear all
	set more off
	use "$directory\Patent Inventors.dta"
	sort NamesForSub core fyear
	quietly bys NamesForSub core: gen dup1 = cond(_N==1,0,_n)
	keep if dup1 == 0 | dup1 == 1	
	gen newinventor = 1
	tab core if fpanel <= 10
	
	* Organizations (Assignees)
	clear all
	set more off
	use "$directory\Patent Organizations.dta"
	sort OrgCode core fyear
	quietly bys OrgCode core: gen dup1 = cond(_N==1,0,_n)
	keep if dup1 == 0 | dup1 == 1	
	gen neworg = 1
	tab core if fpanel <= 10
}
*** Table 3 Panel C Comparison of Means
{
	clear all
	set more off
	use "$directory\Patent Organizations a.dta"
	sort PatID
	global x core
	set more off
	global variables only_university only_firm joint_orgtype  
	matrix m1=(1)
	matrix m2=(1)
	matrix m3=(1)
	matrix m4=(1)
	foreach x in $variables {
	set more off
	di "T-test of means for `x'"
	ttest `x' if fpanel <= 10, by($x) unequal
	matrix m1 = m1\(r(mu_1))
	matrix m2 = m2\(r(mu_2))
	matrix m3 = m3\(r(t))
	matrix m4 = m4\(r(p))
	}
	matrix q1 = (m1[2..4,1..1],m2[2..4,1..1],m3[2..4,1..1],m4[2..4,1..1])
	matrix colnames q1 = rDNA PCR t-stat p-value
	matrix rownames q1 = OnlyUni OnlyFirm Joint
	matrix drop m1 m2 m3 m4
	mat list q1
}

		* FIGURE 2: Scale of Activity

*** Figure 2 Panel A (Publications: Publication Count)
{
	clear all
	set more off
	use "$directory\Publications.dta"
	drop if panel > 15
	bys panel: egen PCRpub = sum(pub) if core == 1 
	bys panel: egen rDNApub = sum(pub) if core == 0 
	twoway  (line PCRpub panel, color(blue) mcolor(black) lwidth(thick) msymbol(S)) (line rDNApub panel, color(red) mcolor(gs10) lwidth(medthick) msymbol(S)), yscale(range(0 800)) ylabel(0(100)800) ytitle("Publication Count") xscale(range(1 15)) xlabel(1(1)15) xtitle("Panel") graphregion(fcolor(white)) legend(label(1 "PCR") label(2 "rDNA")) legend(subtitle("Publication Trends"))
}
*** Figure 2 Panel B (Publications: Unique Organizations)
{
	clear all
	set more off
	use "$directory\Publication Organizations.dta"
	sort OrgCode core year
	quietly bys OrgCode core: gen dup1 = cond(_N==1,0,_n)
	keep if dup1 == 0 | dup1 == 1	
	gen neworg = 1
	drop if panel > 15
	bys panel : egen PCR_NewOrg = sum(neworg) if core == 1 
	bys panel : egen rDNA_NewOrg = sum(neworg) if core == 0 
	twoway  (line PCR_NewOrg panel, lwidth(thick) color(blue) mcolor(black) msymbol(S)) (line rDNA_NewOrg panel, mcolor(gs10) color(red) lwidth(medthick) msymbol(S)), yscale(range(0 300)) ylabel(0(50)300) ytitle("Unique Organizations") xscale(range(1 15)) xlabel(1(1)15) xtitle("Panel") graphregion(fcolor(white)) legend(label(1 "PCR") label(2 "rDNA")) legend(subtitle("Publication Trends"))
}
*** Figure 2 Panel C (Publications: Mean Distance (km) Per Publication to Origin)
{
	clear all
	set more off
	use "$directory\Publication Distance.dta"
	bys PubID: egen mean_Dist = mean(Dist)
	keep if num == 1
	drop if panel > 15
	bys panel : egen PCR = mean(mean_Dist) if core == 1 
	bys panel : egen rDNA = mean(mean_Dist) if core == 0
	grstyle init
	grstyle set plain
	twoway (lfitci mean_Dist panel if core == 1, ciplot(rline) lwidth(thick) alpattern(longdash) yscale(range(2500 8000)) ylabel(3000(1000)8000) ytitle("Mean Distance (km) Per Publication to Origin") lcolor(blue) legend(label(2 "PCR")) xscale(range(1 15)) xlabel(1(1)15) xtitle("Panel") graphregion(fcolor(white)) legend(subtitle("Publication Trends"))) (lfitci mean_Dist panel if core == 0, ciplot(rline) lcolor(red) lwidth(medthick) alpattern(dash) legend(label(4 "rDNA"))) 
}
*** Figure 2 Panel A (Patents: Patent Count)
{
	clear all
	set more off
	use "$directory\Patents.dta"
	drop if fpanel > 15
	bys fpanel: egen PCRpat = sum(pat) if core == 1 
	bys fpanel: egen rDNApat = sum(pat) if core == 0 
	twoway  (line PCRpat fpanel, lwidth(thick) color(blue) mcolor(black) msymbol(S)) (line rDNApat fpanel, lwidth(medthick) color(red) mcolor(gs10) msymbol(S)), yscale(range(0 500)) ylabel(0(100)500) ytitle("Patent Count") xscale(range(1 15)) xlabel(1(1)15) xtitle("Panel") graphregion(fcolor(white)) legend(label(1 "PCR") label(2 "rDNA")) legend(subtitle("Patent Application Trends"))
}
*** Figure 2 Panel B (Patents: Unique Assigness)
{
	clear all
	set more off
	use "$directory\Patent Organizations.dta"
	sort OrgCode core fyear
	quietly bys OrgCode core: gen dup1 = cond(_N==1,0,_n)
	keep if dup1 == 0 | dup1 == 1	
	gen neworg = 1
	drop if fpanel > 15
	bys fpanel : egen PCR_NewOrg = sum(neworg) if core == 1 
	bys fpanel : egen rDNA_NewOrg = sum(neworg) if core == 0 
	twoway  (line PCR_NewOrg fpanel, lwidth(thick) color(blue) mcolor(black) msymbol(S)) (line rDNA_NewOrg fpanel, mcolor(gs10) lwidth(medthick) color(red) msymbol(S)), yscale(range(0 60)) ylabel(0(10)60) ytitle("Unique Assignees") xscale(range(1 15)) xlabel(1(1)15) xtitle("Panel") graphregion(fcolor(white)) legend(label(1 "PCR") label(2 "rDNA")) legend(subtitle("Patent Application Trends"))
}
*** Figure 2 Panel C (Patents: Mean Distance (km) Per Patent to Origin)
{
	clear all
	set more off
	use "$directory\Patent Distance.dta"
	bys PatID: egen mean_Dist = mean(Dist)
	keep if num == 1
	drop if fpanel > 15
	bys fpanel : egen PCR_Dist = mean(mean_Dist) if core == 1 
	bys fpanel : egen rDNA_Dist = mean(mean_Dist) if core == 0 
	lab var fpanel "Panel"
	grstyle init
	grstyle set plain
	twoway (lfitci mean_Dist fpanel if core == 1, ciplot(rline) color(blue) lcolor(navy) lwidth(thick) alpattern(longdash) yscale(range(2000 5500)) ylabel(2000(1000)5500) ytitle("Mean Distance (km) Per Patent to Origin") lcolor(blue) legend(label(2 "PCR")) xscale(range(1 15)) xlabel(1(1)15) xtitle("Panel") graphregion(fcolor(white)) legend(subtitle("Patent Application Trends"))) (lfitci mean_Dist fpanel if core == 0, ciplot(rline) lcolor(red) color(red) lwidth(medthick) alpattern(dash) legend(label(4 "rDNA"))) 
}

		* FIGURE 3: Scope of Knowledge Diffusion

*** Figure 3 Panel A (Publications: Unique Concepts)
{
	clear all
	set more off
	use "$directory\Publication Concepts a.dta"
	drop if panel > 15
	bys panel: egen PCRtopic = sum(topic) if core == 1 
	bys panel: egen rDNAtopic = sum(topic) if core == 0 
	twoway  (line PCRtopic panel, lwidth(thick) color(blue) mcolor(black) msymbol(S)) (line rDNAtopic panel, mcolor(gs10) lwidth(medthick) color(red) msymbol(S)), yscale(range(0 35)) ylabel(0(5)35) ytitle("Unique Concepts") xscale(range(1 15)) xlabel(1(1)15) xtitle("Panel") graphregion(fcolor(white)) legend(label(1 "PCR") label(2 "rDNA")) legend(subtitle("Publication Trends"))
}	
*** Figure 3 Panel B (Publications: Mean Range of Concepts)
{
	clear all
	set more off
	use "$directory\Publication Concepts b.dta"
	sort PubID num
	bys PubID: egen max_concept = max(num)
	keep if num == 1
	drop if panel > 15
	grstyle init
	grstyle set plain
	lab var panel "Panel"
	twoway (lfitci max_concept panel if core == 1, ciplot(rline) color(blue) lwidth(thick) alpattern(longdash) yscale(range(5 15)) ylabel(5(5)15) ytitle("Mean Range of Concepts") lcolor(blue) legend(label(2 "PCR")) xscale(range(1 15)) xlabel(1(1)15) xtitle("Panel") graphregion(fcolor(white)) legend(subtitle("Publication Trends"))) (lfitci max_concept panel if core == 0, ciplot(rline) lcolor(red) lwidth(medthick) alpattern(dash) legend(label(4 "rDNA"))) 
}
*** Figure 3 Panel A (Patents: Unique US Classes)
{
	clear all
	set more off
	use "$directory\Patent Classes a.dta"
	drop if fpanel > 15
	bys fpanel: egen PCRtopic = sum(topic) if core == 1 
	bys fpanel: egen rDNAtopic = sum(topic) if core == 0 
	twoway  (line PCRtopic fpanel, lwidth(thick) color(blue) mcolor(black) msymbol(S)) (line rDNAtopic fpanel, mcolor(gs10) lwidth(medthick) color(red) msymbol(S)), yscale(range(0 200)) ylabel(0(50)200) ytitle("Unique US Classes") xscale(range(1 15)) xlabel(1(1)15) xtitle("Panel") graphregion(fcolor(white)) legend(label(1 "PCR") label(2 "rDNA")) legend(subtitle("Patent Application Trends"))
}		
*** Figure 3 Panel B (Patents: Mean Range of Classes)
{
	clear all
	set more off
	use "$directory\Patent Classes b.dta"
	sort PatID num
	bys PatID: egen max_class = max(num)
	keep if num == 1
	drop if fpanel > 15
	grstyle init
	grstyle set plain
	lab var fpanel "Panel"
	twoway (lfitci max_class fpanel if core == 1, ciplot(rline) lwidth(thick) color(blue) alpattern(longdash) yscale(range(4 9)) ylabel(4(1)9) ytitle("Mean Range of Classes") lcolor(blue) legend(label(2 "PCR")) xscale(range(1 15)) xlabel(1(1)15) xtitle("Panel") graphregion(fcolor(white)) legend(subtitle("Patent Application Trends"))) (lfitci max_class fpanel if core == 0, ciplot(rline) lcolor(red) lwidth(medthick) color(red) alpattern(dash) legend(label(4 "rDNA"))) 
}

		* FIGURE 4: Nature of Research Activity
		
*** Figure 4 Panel A (Publications: Mean Coauthors Per Publication)
{
	clear all
	set more off
	use "$directory\Publication Authors.dta"
	sort NamesForSub
	bys PubID: egen max_auth = max(num)
	keep if num == 1
	drop if panel > 15
	grstyle init
	grstyle set plain
	lab var panel "Panel"
	twoway (lfitci max_auth panel if core == 1, ciplot(rline) color(blue) lwidth(thick) alpattern(longdash) yscale(range(2 5.5)) ylabel(2(0.5)5.5) ytitle("Mean Coauthors Per Publication") lcolor(blue) legend(label(2 "PCR")) xscale(range(1 15)) xlabel(1(1)15) xtitle("Panel") graphregion(fcolor(white))legend(subtitle("Publication Trends"))) (lfitci max_auth panel if core == 0, ciplot(rline) lcolor(red) color(red) lwidth(medthick) alpattern(dash) legend(label(4 "rDNA"))) 
}
*** Figure 4 Panel B (Publications: New Entrants)
{
	clear all
	set more off
	use "$directory\Publications.dta"
	drop if panel > 15
	bys panel: egen PCRisolate = sum(PCR_isolate) if core == 1 
	bys panel: egen rDNAisolate = sum(rDNA_isolate) if core == 0 
	twoway  (line PCRisolate panel, mcolor(black) lwidth(thick) color(blue) msymbol(S)) (line rDNAisolate panel, mcolor(gs10) lwidth(medthick) color(red) msymbol(S)), yscale(range(0 350)) ylabel(0(50)350) ytitle("New Entrants") xscale(range(1 15)) xlabel(1(1)15) xtitle("Panel") graphregion(fcolor(white)) legend(label(1 "PCR") label(2 "rDNA")) legend(subtitle("Publication Trends"))
}
*** Figure 4 Panel A (Patents: Mean Co-inventors Per Patent)
{
	clear all
	set more off
	use "$directory\Patent Inventors.dta"
	sort NamesForSub
	bys PatID: egen max_inventor = max(num)
	keep if num == 1
	drop if fpanel > 15
	grstyle init
	grstyle set plain
	lab var fpanel "Panel"
	twoway (lfitci max_inventor fpanel if core == 1, ciplot(rline) lwidth(thick) alpattern(longdash) yscale(range(2 3.5)) ylabel(2(0.5)3.5) ytitle("Mean Co-inventors Per Patent") lcolor(blue) xscale(range(1 15)) xlabel(1(1)15) xtitle("Panel") graphregion(fcolor(white)) legend(label(2 "PCR")) legend(subtitle("Patent Application Trends"))) (lfitci max_inventor fpanel if core == 0, ciplot(rline) lcolor(red) lwidth(medthick) alpattern(dash) legend(label(4 "rDNA"))) 
}
*** Figure 4 Panel B (Patents: New Entrants)
{
	clear all
	set more off
	use "$directory\Patent Organizations a.dta"
	drop if fpanel > 15
	bys fpanel: egen PCRisolate = sum(PCR_isolate) if core == 1 
	bys fpanel: egen rDNAisolate = sum(rDNA_isolate) if core == 0 
	twoway  (line PCRisolate fpanel, lwidth(thick) color(blue) mcolor(black) msymbol(S)) (line rDNAisolate fpanel, mcolor(gs10) lwidth(medthick) color(red)  msymbol(S)), yscale(range(0 175)) ylabel(0(25)175) ytitle("New Entrants") xscale(range(1 15)) xlabel(1(1)15) xtitle("Panel") graphregion(fcolor(white)) legend(label(1 "PCR") label(2 "rDNA")) legend(subtitle("Patent Application Trends"))
}

		* TABLE 4: Comparison of Means - Joint Scientists
clear all
set more off 
use "$directory\Joint Scientists.dta"

*** Table 4 Panel rDNA
{
	preserve 
	gen match = 1 if _merge ==3 
	recode match (.=0)
	global x match
	set more off
	global variables pub_counter panel pat_counter fpanel
	matrix m1=(1)
	matrix m2=(1)
	matrix m3=(1)
	matrix m4=(1)
	foreach x in $variables {
	set more off
	di "T-test of means for `x'"
	ttest `x' if core == 0, by($x) unequal // rDNA
	matrix m1 = m1\(r(mu_1))
	matrix m2 = m2\(r(mu_2))
	matrix m3 = m3\(r(t))
	matrix m4 = m4\(r(p))
	}
	matrix q1 = (m1[2..5,1..1],m2[2..5,1..1],m3[2..5,1..1],m4[2..5,1..1])
	matrix colnames q1 = noMatch Match t-stat p-value
	matrix rownames q1 = PubCounter PubPanel PatCounter PatPanel  
	matrix drop m1 m2 m3 m4
	mat list q1
	restore 
}
*** Table 4 Panel PCR
{
	preserve 
	gen match = 1 if _merge ==3 
	recode match (.=0)
	global x match
	set more off
	global variables pub_counter panel pat_counter fpanel
	matrix m1=(1)
	matrix m2=(1)
	matrix m3=(1)
	matrix m4=(1)
	foreach x in $variables {
	set more off
	di "T-test of means for `x'"
	ttest `x' if core == 1, by($x) unequal // PCR
	matrix m1 = m1\(r(mu_1))
	matrix m2 = m2\(r(mu_2))
	matrix m3 = m3\(r(t))
	matrix m4 = m4\(r(p))
	}
	matrix q1 = (m1[2..5,1..1],m2[2..5,1..1],m3[2..5,1..1],m4[2..5,1..1])
	matrix colnames q1 = noMatch Match t-stat p-value
	matrix rownames q1 = PubCounter PubPanel PatCounter PatPanel  
	matrix drop m1 m2 m3 m4
	mat list q1
	restore 
}

		* TABLE 5: Summary of Results 
{
*Authors rely on prior results to inform whether rDNA or PCR leads in diffusion rate for each metric. 
}

		* FIGURE 5: Proportion of Total (10-year Panel) Activity

*** Figure 5 Panel A (Publications: Proportion of Publications)
{
	clear all
	set more off
	use "$directory\Publications.dta"
	drop if panel > 10
	egen PCR_tot = sum(pub) if core == 1
	egen rDNA_tot = sum(pub) if core == 0
	bys panel: egen PCR_yr_tot = sum(pub) if core == 1
	bys panel: egen rDNA_yr_tot = sum(pub) if core == 0
	gen PCR_pct = PCR_yr_tot/PCR_tot
	gen rDNA_pct = rDNA_yr_tot/rDNA_tot
	collapse (mean) PCR_pct rDNA_pct, by (panel)
	twoway  (line PCR_pct panel, mcolor(black) color(blue) lwidth(thick) msymbol(S)) (line rDNA_pct panel, mcolor(gs10) color(red) lwidth(medthick) msymbol(S)), yscale(range(0 0.2)) ylabel(0(.05)0.2) ytitle("Proportion of Publications") xscale(range(1 10)) xlabel(1(1)10) xtitle("Panel") graphregion(fcolor(white)) legend(label(1 "PCR") label(2 "rDNA")) legend(subtitle("Publication Trend"))
}
*** Figure 5 Panel B (Publications: Proportion of Unique Organizations)
{
	clear all
	set more off
	use "$directory\Publication Organizations.dta"
	sort OrgCode core year
	quietly bys OrgCode core: gen dup1 = cond(_N==1,0,_n)
	keep if dup1 == 0 | dup1 == 1	
	gen neworg = 1
	drop if panel > 10
	egen PCR_tot = sum(neworg) if core == 1
	egen rDNA_tot = sum(neworg) if core == 0
	bys panel: egen PCR_yr_tot = sum(neworg) if core == 1
	bys panel: egen rDNA_yr_tot = sum(neworg) if core == 0
	gen PCR_pct = PCR_yr_tot/PCR_tot
	gen rDNA_pct = rDNA_yr_tot/rDNA_tot
	collapse (mean) PCR_pct rDNA_pct, by (panel)
	twoway  (line PCR_pct panel, lwidth(thick) color(blue) mcolor(black) msymbol(S)) (line rDNA_pct panel, lwidth(medthick) color(red) mcolor(gs10) msymbol(S)), yscale(range(0 0.2)) ylabel(0(.05)0.2) ytitle("Proportion of Unique Organizations") xscale(range(1 10)) xlabel(1(1)10) xtitle("Panel") graphregion(fcolor(white)) legend(label(1 "PCR") label(2 "rDNA")) legend(subtitle("Publication Trends"))
}
*** Figure 5 Panel C (Publications: Proportion of Unique Concepts)
{
	clear all
	set more off
	use "$directory\Publication Concepts a.dta"
	drop if panel > 10
	egen PCR_tot = sum(topic) if core == 1
	egen rDNA_tot = sum(topic) if core == 0
	bys panel: egen PCR_yr_tot = sum(topic) if core == 1
	bys panel: egen rDNA_yr_tot = sum(topic) if core == 0
	gen PCR_pct = PCR_yr_tot/PCR_tot
	gen rDNA_pct = rDNA_yr_tot/rDNA_tot
	collapse (mean) PCR_pct rDNA_pct, by (panel)
	twoway  (line PCR_pct panel, lwidth(thick) color(blue) mcolor(black) msymbol(S)) (line rDNA_pct panel, lwidth(medthick) color(red) mcolor(gs10) msymbol(S)), yscale(range(0 0.25)) ylabel(0(.05)0.25) ytitle("Proportion of Unique Concepts") xscale(range(1 10)) xlabel(1(1)10) xtitle("Panel") graphregion(fcolor(white)) legend(label(1 "PCR") label(2 "rDNA")) legend(subtitle("Publication Trends"))
}
*** Figure 5 Panel A (Patents: Proportion of Patents)
{
	clear all
	set more off
	use "$directory\Patents.dta"
	drop if fpanel > 10
	egen PCR_tot = sum(pat) if core == 1
	egen rDNA_tot = sum(pat) if core == 0
	bys fpanel: egen PCR_yr_tot = sum(pat) if core == 1
	bys fpanel: egen rDNA_yr_tot = sum(pat) if core == 0
	gen PCR_pct = PCR_yr_tot/PCR_tot
	gen rDNA_pct = rDNA_yr_tot/rDNA_tot
	collapse (mean) PCR_pct rDNA_pct, by (fpanel)
	twoway  (line PCR_pct fpanel, lwidth(thick) color(blue) mcolor(black) msymbol(S)) (line rDNA_pct fpanel, mcolor(gs10) lwidth(medthick) color(red)  msymbol(S)), yscale(range(0 0.4)) ylabel(0(.1)0.4) ytitle("Proportion of Patents") xscale(range(1 10)) xlabel(1(1)10) xtitle("Panel") graphregion(fcolor(white)) legend(label(1 "PCR") label(2 "rDNA")) legend(subtitle("Patent Application Trends"))
}
*** Figure 5 Panel B (Patents: Proportion of Unique Assignees)
{
	clear all
	set more off
	use "$directory\Patent Organizations.dta"
	sort OrgCode core fyear
	quietly bys OrgCode core: gen dup1 = cond(_N==1,0,_n)
	keep if dup1 == 0 | dup1 == 1	
	gen neworg = 1
	drop if fpanel > 10
	egen PCR_tot = sum(neworg) if core == 1
	egen rDNA_tot = sum(neworg) if core == 0
	bys fpanel: egen PCR_yr_tot = sum(neworg) if core == 1
	bys fpanel: egen rDNA_yr_tot = sum(neworg) if core == 0
	gen PCR_pct = PCR_yr_tot/PCR_tot
	gen rDNA_pct = rDNA_yr_tot/rDNA_tot
	collapse (mean) PCR_pct rDNA_pct, by (fpanel)
	twoway  (line PCR_pct fpanel, lwidth(thick) color(blue) mcolor(black) msymbol(S)) (line rDNA_pct fpanel, lwidth(medthick) color(red) mcolor(gs10) msymbol(S)), yscale(range(0 0.35)) ylabel(0(.05)0.35) ytitle("Proportion of Unique Assignees") xscale(range(1 10)) xlabel(1(1)10) xtitle("Panel") graphregion(fcolor(white)) legend(label(1 "PCR") label(2 "rDNA")) legend(subtitle("Patent Application Trends"))
}
*** Figure 5 Panel C (Patents: Proportion of Unique US Classes)
{
	clear all
	set more off
	use "$directory\Patent Classes a.dta"
	drop if fpanel > 10
	egen PCR_tot = sum(topic) if core == 1
	egen rDNA_tot = sum(topic) if core == 0
	bys fpanel: egen PCR_yr_tot = sum(topic) if core == 1
	bys fpanel: egen rDNA_yr_tot = sum(topic) if core == 0
	gen PCR_pct = PCR_yr_tot/PCR_tot
	gen rDNA_pct = rDNA_yr_tot/rDNA_tot
	collapse (mean) PCR_pct rDNA_pct, by (fpanel)
	twoway  (line PCR_pct fpanel, lwidth(thick) color(blue) mcolor(black) msymbol(S)) (line rDNA_pct fpanel, mcolor(gs10) lwidth(medthick) color(red) msymbol(S)), yscale(range(0 0.4)) ylabel(0(.1)0.4) ytitle("Proportion of Unique US Classes") xscale(range(1 10)) xlabel(1(1)10) xtitle("Panel") graphregion(fcolor(white)) legend(label(1 "PCR") label(2 "rDNA")) legend(subtitle("Patent Application Trends"))
}

END